DATA1901 Project 1 Group 5 Submission

Code
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(plotly))
suppressPackageStartupMessages(library(fmsb))

data = read_excel("Project_1_Data.xlsx", sheet = "pooled123")

filteredData = select(data, Date, PID, BSSQ_1:BSSQ_16, ASSQ_1:ASSQ_16, age, VRexperience, ssq_full, gender, sm)
#we only want to look at those that did not undergo social modelling conditions
filteredData = subset(filteredData, sm == "NO_SM")

#calculating differences between baseline and active SSQ for each symptom
filteredData = mutate(filteredData, d_discomfort = ASSQ_1 - BSSQ_1)
filteredData = mutate(filteredData, d_fatigue = ASSQ_2 - BSSQ_2)
filteredData = mutate(filteredData, d_headache = ASSQ_3 - BSSQ_3)
filteredData = mutate(filteredData, d_eyestrain = ASSQ_4 - BSSQ_4)
filteredData = mutate(filteredData, d_difficulty_focusing = ASSQ_5 - BSSQ_5)
filteredData = mutate(filteredData, d_salivation = ASSQ_6 - BSSQ_6)
filteredData = mutate(filteredData, d_sweating = ASSQ_7 - BSSQ_7)
filteredData = mutate(filteredData, d_nausea = ASSQ_8 - BSSQ_8)
filteredData = mutate(filteredData, d_difficulty_concentrating = ASSQ_9 - BSSQ_9)
filteredData = mutate(filteredData, d_fullness_of_head = ASSQ_10 - BSSQ_10)
filteredData = mutate(filteredData, d_blurred_vision = ASSQ_11 - BSSQ_11)
filteredData = mutate(filteredData, d_dizziness_o = ASSQ_12 - BSSQ_12)
filteredData = mutate(filteredData, d_dizziness_c = ASSQ_13 - BSSQ_13)
filteredData = mutate(filteredData, d_vertigo = ASSQ_14 - BSSQ_14)
filteredData = mutate(filteredData, d_stomach_awareness = ASSQ_15 - BSSQ_15)
filteredData = mutate(filteredData, d_burping = ASSQ_16 - BSSQ_16)

#reclasss VR experience as factor (was chr)
filteredData$VRexperience = as.factor(filteredData$VRexperience)

#we want to filter this data even further and split it into age groups
#once in age groups, calculate the mean change for each of the age groups for each symptom
filteredData = mutate(filteredData, age_group = case_when(
  age >= 16 & age <= 21 ~ "16 to 21",
  age >= 22 & age <= 29 ~ "22 to 29",
  age >= 30 & age <= 37 ~ "30 to 37",
  age >= 38 & age <= 45 ~ "38 to 45",
  age > 45 ~ "above 45"
))

#convert the age groups into factors
filteredData$age_group = as.factor(filteredData$age_group)

#renaming columns to be more informative
names(filteredData)[names(filteredData) == 'BSSQ_1'] <- 'BSSQ_discomfort'
names(filteredData)[names(filteredData) == 'BSSQ_2'] <- 'BSSQ_fatigue'
names(filteredData)[names(filteredData) == 'BSSQ_3'] <- 'BSSQ_headache'
names(filteredData)[names(filteredData) == 'BSSQ_4'] <- 'BSSQ_eyestrain'
names(filteredData)[names(filteredData) == 'BSSQ_5'] <- 'BSSQ_difficulty_focusing'
names(filteredData)[names(filteredData) == 'BSSQ_6'] <- 'BSSQ_salivation'
names(filteredData)[names(filteredData) == 'BSSQ_7'] <- 'BSSQ_sweating'
names(filteredData)[names(filteredData) == 'BSSQ_8'] <- 'BSSQ_nausea'
names(filteredData)[names(filteredData) == 'BSSQ_9'] <- 'BSSQ_difficulty_concentrating'
names(filteredData)[names(filteredData) == 'BSSQ_10'] <- 'BSSQ_fullness_of_head'
names(filteredData)[names(filteredData) == 'BSSQ_11'] <- 'BSSQ_blurred_vision'
names(filteredData)[names(filteredData) == 'BSSQ_12'] <- 'BSSQ_dizziness_o'
names(filteredData)[names(filteredData) == 'BSSQ_13'] <- 'BSSQ_dizziness_c'
names(filteredData)[names(filteredData) == 'BSSQ_14'] <- 'BSSQ_vertigo'
names(filteredData)[names(filteredData) == 'BSSQ_15'] <- 'BSSQ_stomach_awareness'
names(filteredData)[names(filteredData) == 'BSSQ_16'] <- 'BSSQ_burping'

names(filteredData)[names(filteredData) == 'ASSQ_1'] <- 'ASSQ_discomfort'
names(filteredData)[names(filteredData) == 'ASSQ_2'] <- 'ASSQ_fatigue'
names(filteredData)[names(filteredData) == 'ASSQ_3'] <- 'ASSQ_headache'
names(filteredData)[names(filteredData) == 'ASSQ_4'] <- 'ASSQ_eyestrain'
names(filteredData)[names(filteredData) == 'ASSQ_5'] <- 'ASSQ_difficulty_focusing'
names(filteredData)[names(filteredData) == 'ASSQ_6'] <- 'ASSQ_salivation'
names(filteredData)[names(filteredData) == 'ASSQ_7'] <- 'ASSQ_sweating'
names(filteredData)[names(filteredData) == 'ASSQ_8'] <- 'ASSQ_nausea'
names(filteredData)[names(filteredData) == 'ASSQ_9'] <- 'ASSQ_difficulty_concentrating'
names(filteredData)[names(filteredData) == 'ASSQ_10'] <- 'ASSQ_fullness_of_head'
names(filteredData)[names(filteredData) == 'ASSQ_11'] <- 'ASSQ_blurred_vision'
names(filteredData)[names(filteredData) == 'ASSQ_12'] <- 'ASSQ_dizziness_o'
names(filteredData)[names(filteredData) == 'ASSQ_13'] <- 'ASSQ_dizziness_c'
names(filteredData)[names(filteredData) == 'ASSQ_14'] <- 'ASSQ_vertigo'
names(filteredData)[names(filteredData) == 'ASSQ_15'] <- 'ASSQ_stomach_awareness'
names(filteredData)[names(filteredData) == 'ASSQ_16'] <- 'ASSQ_burping'


#isolating the data dictionary
data_dict = read_excel("Project_1_Data.xlsx", sheet = "data_dictionary")

#filter into groups with experience or not
withVRexperience = filter(filteredData, VRexperience == 'Yes')
noVRexperience = filter(filteredData, VRexperience == 'No')

#taking averages for with experience
yes_avg_d_discomfort = mean(withVRexperience$d_discomfort)
yes_avg_d_fatigue = mean(withVRexperience$d_fatigue)
yes_avg_d_headache = mean(withVRexperience$d_headache)
yes_avg_d_eyestrain = mean(withVRexperience$d_eyestrain)
yes_avg_d_difficulty_focusing = mean(withVRexperience$d_difficulty_focusing)
yes_avg_d_salivation = mean(withVRexperience$d_salivation)
yes_avg_d_sweating = mean(withVRexperience$d_sweating)
yes_avg_d_nausea = mean(withVRexperience$d_nausea)
yes_avg_d_difficulty_concentrating = mean(withVRexperience$d_difficulty_concentrating)
yes_avg_d_fullness = mean(withVRexperience$d_fullness_of_head)
yes_avg_d_vision = mean(withVRexperience$d_blurred_vision)
yes_avg_d_dizziness_o = mean(withVRexperience$d_dizziness_o)
yes_avg_d_dizziness_c = mean(withVRexperience$d_dizziness_c)
yes_avg_d_vertigo = mean(withVRexperience$d_vertigo)
yes_avg_d_stomach = mean(withVRexperience$d_stomach_awareness)
yes_avg_d_burping = mean(withVRexperience$d_burping)

#taking averages for without experience
no_avg_d_discomfort = mean(noVRexperience$d_discomfort)
no_avg_d_fatigue = mean(noVRexperience$d_fatigue)
no_avg_d_headache = mean(noVRexperience$d_headache)
no_avg_d_eyestrain = mean(noVRexperience$d_eyestrain)
no_avg_d_difficulty_focusing = mean(noVRexperience$d_difficulty_focusing)
no_avg_d_salivation = mean(noVRexperience$d_salivation)
no_avg_d_sweating = mean(noVRexperience$d_sweating)
no_avg_d_nausea = mean(noVRexperience$d_nausea)
no_avg_d_difficulty_concentrating = mean(noVRexperience$d_difficulty_concentrating)
no_avg_d_fullness = mean(noVRexperience$d_fullness_of_head)
no_avg_d_vision = mean(noVRexperience$d_blurred_vision)
no_avg_d_dizziness_o = mean(noVRexperience$d_dizziness_o)
no_avg_d_dizziness_c = mean(noVRexperience$d_dizziness_c)
no_avg_d_vertigo = mean(noVRexperience$d_vertigo)
no_avg_d_stomach = mean(noVRexperience$d_stomach_awareness)
no_avg_d_burping = mean(noVRexperience$d_burping)


#taking averages for different age groups
grp1 = filter(filteredData, age_group == '16 to 21')
grp2 = filter(filteredData, age_group == '22 to 29')
grp3 = filter(filteredData, age_group == '30 to 37')
grp4 = filter(filteredData, age_group == '38 to 45')
grp5 = filter(filteredData, age_group == 'above 45')

mean_sqq_ages = c(mean(grp1$ssq_full),
                  mean(grp2$ssq_full),
                  mean(grp3$ssq_full),
                  mean(grp4$ssq_full),
                  mean(grp5$ssq_full)
                  )

data_by_age_group = data.frame(
  age_group = c("16 to 21", '22 to 29','30 to 37','38 to 45','Above 45'),
  mean_ssq = mean_sqq_ages
  
)

filteredData = mutate(filteredData, "generation" = case_when(year(Date) - age >= 2010 ~ "a", year(Date) - age >= 1997 ~ "Z", year(Date) - age >= 1981 ~ "Y", year(Date) - age >= 1965 ~ "X", TRUE ~ "Old"))

#finding mean and standard deviations for with and without experience
mean_ssq_yes = mean(withVRexperience$ssq_full)
mean_ssq_no = mean(noVRexperience$ssq_full)

sd_ssq_yes = sd(withVRexperience$ssq_full)
sd_ssq_no = sd(noVRexperience$ssq_full)

Summary of Findings

We found that individuals with prior VR experience reported heightened symptoms of motion sickness when exposed to VR, compared to those without experience. Age and gender might have had some effect on this observation. Further research should examine the nature, amount, and frequency of prior VR use to improve accuracy.

Initial Data Analysis (IDA)

Source

The data was sourced from Cosette Saunder’s paper “Socially Acquired Nocebo Effects Generalize but Are Not Attenuated by Choice”.

Structure

We specifically looked at participants that did not undergo any social modelling; 69 participants, each with 51 variables recorded. Our project focused the following variables:

  • Baseline SSQ (BSSQ), Active SSQ (ASSQ), and SSQ_Fullof 16 symptoms (quantitative, discrete): self-reported symptom severity before and after undergoing VR respectively, on a scale of 1 to 10.

  • Participants’ VRexperience (qualitative, nominal); sorted by experience to see how symptoms differed.

  • age of participants (quantitative, discrete); they were then sorted into age groups.

  • The change (\(\Delta\)) between BSSQ and ASSQ was calculated for each participant for each symptom, and we took the average \(\Delta\) for each symptom for each group (VRexperience and age_group) (quantitative, discrete).

Code
data_age_groups = select(filteredData, age_group)
groups_counted = data_age_groups %>% count(age_group)

age_pie = plot_ly(groups_counted, labels = ~age_group, values = ~n,
                 type = 'pie', direction = 'clockwise', sort = FALSE, rotation = 30, marker = list(colors = c("#A8DADC", "#FFD6A5", "#FDFFB6", "#BDE0FE")))
age_pie <- age_pie %>% layout(title = 'Distribution of ages',
                            showlegend = TRUE,
                            legend = list(title = list(text = "Age Group")))
age_pie
Code
data_experience = select(filteredData, VRexperience)
exp_counted = data_experience %>% count(VRexperience)

vr_pie = plot_ly(exp_counted, labels = ~VRexperience, values = ~n,
                 type = 'pie', marker = list(colors = c("#FDFFB6", "#BDE0FE")))
vr_pie <- vr_pie %>% layout(title = 'Distribution of VR experience',
                            showlegend = TRUE, legend = list(title = list(text = "VR experience")))

vr_pie
Code
data_gender = select(filteredData, gender)
gender_counted = data_gender %>% count(gender)

gender_pie = plot_ly(gender_counted, labels = ~gender, values = ~n,
                 type = 'pie', marker = list(colors = c("#FDFFB6", "#BDE0FE")))
gender_pie <- gender_pie %>% layout(title = 'Distribution of Genders (With VR experience)',
                            showlegend = TRUE, legend = list(title = list(text = "Gender")))

gender_pie
Code
data_experience = select(withVRexperience, age_group)
exp_counted = data_experience %>% count(age_group)

vr_pie_by_age = plot_ly(exp_counted, labels = ~age_group, values = ~n,
                 type = 'pie',marker = list(colors = c("#A8DADC", "#FFD6A5", "#FDFFB6", "#BDE0FE")))
vr_pie_by_age <- vr_pie_by_age %>% layout(title = 'Distribution of age groups for those with VR experience',
                            showlegend = TRUE, legend = list(title = list(text = "Age group")))

vr_pie_by_age
Code
data_gender = select(withVRexperience, gender)
gender_counted = data_gender %>% count(gender)

gender_pie_yes = plot_ly(gender_counted, labels = ~gender, values = ~n,
                 type = 'pie', marker = list(colors = c("#FDFFB6", "#BDE0FE")))
gender_pie_yes <- gender_pie_yes %>% layout(title = 'Distribution of Genders',
                            showlegend = TRUE, legend = list(title = list(text = "Gender")))

gender_pie_yes
Code
data_experience = select(noVRexperience, age_group)
exp_counted = data_experience %>% count(age_group)

no_vr_pie_by_age = plot_ly(exp_counted, labels = ~age_group, values = ~n,
                 type = 'pie', marker = list(colors = c("#A8DADC", "#FFD6A5", "#FDFFB6", "#BDE0FE")))
no_vr_pie_by_age <- no_vr_pie_by_age %>% layout(title = 'Distribution of age groups for those without VR experience',
                            showlegend = TRUE, legend = list(title = list(text = "Age group")))

no_vr_pie_by_age
Code
data_gender = select(noVRexperience, gender)
gender_counted = data_gender %>% count(gender)

gender_pie_no = plot_ly(gender_counted, labels = ~gender, values = ~n,
                 type = 'pie', marker = list(colors = c("#FDFFB6", "#BDE0FE")))
gender_pie_no <- gender_pie_no %>% layout(title = 'Distribution of Genders (No VR experience)',
                            showlegend = TRUE, legend = list(title = list(text = "Gender")))

gender_pie_no

Limitations

  • Self-reporting bias for both BSSQ and ASSQ which makes the values prone to being over or underestimates by each participant.

  • VRexperience is a binary qualitative classification, it is not descriptive of the nature, amount or frequency, and that those with more than 10 VR experiences were excluded.

Research Question

Does past VR experience affect the symptoms experienced by people after a virtual reality experience?

Code
plt = ggplot(filteredData, aes(x = VRexperience, y = ssq_full, fill = VRexperience)) +
  geom_boxplot() +
  theme_minimal() +
  labs(x = "VR Experience", y = "Full SSQ") + scale_fill_manual(values = c("#A8DADC", "#FFD6A5")) +
  theme(legend.position = "none")
ggplotly(plt)

Participants with and without VR experience reported varied severity of symptoms. Interestingly, the VR-experienced group saw a greater median ssq_full and IQR at 7.0 and 16.25 respectively, compared to -0.5 and 9.5 respectively for those with no experience, pointing to more severe symptoms, but also less consistent severity for those with VR experience. The mean and standard deviation for “no experience” at 2.0 and 6.77 “with experience” at 8.91 and 12.5 also supports this.

Code
library(plotly)

yes_means = c(yes_avg_d_discomfort,
              yes_avg_d_fatigue,
              yes_avg_d_headache,
              yes_avg_d_eyestrain,
              yes_avg_d_sweating,
              yes_avg_d_nausea,
              yes_avg_d_dizziness_c,
              yes_avg_d_dizziness_o,
              yes_avg_d_difficulty_focusing,
              yes_avg_d_difficulty_concentrating,
              yes_avg_d_salivation,
              yes_avg_d_vision,
              yes_avg_d_vertigo,
              yes_avg_d_stomach,
              yes_avg_d_fullness,
              yes_avg_d_burping
              )

no_means = c(no_avg_d_discomfort,
             no_avg_d_fatigue,
             no_avg_d_headache,
             no_avg_d_eyestrain,
             no_avg_d_sweating,
             no_avg_d_nausea,
             no_avg_d_dizziness_c,
             no_avg_d_dizziness_o,
             no_avg_d_difficulty_focusing,
             no_avg_d_difficulty_concentrating,
             no_avg_d_salivation,
             no_avg_d_vision,
             no_avg_d_vertigo,
             no_avg_d_stomach,
             no_avg_d_fullness,
             no_avg_d_burping
             )

fig <- plot_ly(
  type  = 'scatterpolar',
  fill = 'toself',
  title = "Average change in BSSQ and ASSQ depending on VR experience for each symptom"
)

fig <- fig %>% 
  add_trace(
    r = yes_means,
    theta = c('Discomfort', 'Fatigue', 'Headache', 'Eyestrain', 'Sweating', 'Nausea', 'Dizziness (closed)', 'Dizziness (open)', 'Difficulty Focusing', 'Difficulty Concentrating', 'Salivation', 'Blurry Vision', 'Vertigo', 'Stomach Awareness', 'Fullness of head','Burping'),
    name = 'With VR Experience'
  )

fig <- fig %>% 
  add_trace(
    r = no_means,
    theta = c('Discomfort', 'Fatigue', 'Headache', 'Eyestrain', 'Sweating', 'Nausea','Dizziness (closed)', 'Dizziness (open)', 'Difficulty Focusing', 'Difficulty Concentrating', 'Salivation', 'Blurry Vision',  'Vertigo', 'Stomach Awareness', 'Fullness of head','Burping'),
    name = 'No VR Experience'
  )

fig <- fig %>%
  layout(
    polar = list(
      radialaxis = list(
        visible = T,
        range = c(-0.5,2)
      )
    ),
    showlegend = T,
    title = "Change in Symptoms by VRexperience",
    margin = c(5,5,5,5),
    legend = list(title = list(text = "VR experience"))
  )

fig

The spider-chart above reinforces what we see in the box-plot. Those with VR experience showed a higher mean \(\Delta\) for almost all of the 16 symptoms.

This is contrasting to research suggesting users with VR experience undergo less motion sickness (Chattha et al., 2020). This could be due to the smaller sample size of our data, wherein the symptoms vary too significantly for strong correlations to be found and random chance has noticeable effect. Additionally, users who have experienced motion sickness in VR before may anticipate feeling sick again, leading to heightened symptoms.

We stratified our analysis using a potential confounding variable: age.

Code
plt2 = ggplot(withVRexperience, aes(x = age_group, y = ssq_full, fill = age_group)) +
  geom_boxplot() +
  labs(title = "Full SSQ for each age group (With experience)", y = "Full SSQ", x = "Age Group") +
  ylim(-10, 60) + scale_fill_manual(values = c("#A8DADC", "#FFD6A5", "#FDFFB6", "#BDE0FE")) +
  theme(legend.position = "none")

ggplotly(plt2)
Code
plt2 = ggplot(noVRexperience, aes(x = age_group, y = ssq_full, fill = age_group)) +
  geom_boxplot() +
  labs(title = "Full SSQ Scores for each age group (No experience)", y = "Full SSQ", x = "Age Group") +
  ylim(-10, 60) + scale_fill_manual(values = c("#A8DADC", "#FFD6A5", "#FDFFB6", "#BDE0FE")) +
  theme(legend.position = "none")

ggplotly(plt2)
Code
library(tidyverse)
library(plotly)

plt = ggplot(filteredData, aes(x = as.factor(age_group) , y = ssq_full, group = VRexperience, fill = VRexperience)) +
  geom_boxplot(position = "dodge") +
  labs(x = "Age Group",  y = "Full SSQ", title = "Full SSQ Scores by Age Group (both groups)") +
  ylim(-10, 60) + scale_fill_manual(values = c("#A8DADC", "#FFD6A5", "#FDFFB6", "#BDE0FE"))

ggplotly(plt) %>%  layout(boxmode = "group")
Code
plt2 = ggplot(filteredData, aes(x = age_group, y = ssq_full, fill = age_group)) +
  geom_boxplot() +
  labs(title = "Full SSQ Scores for each age group", y = "Full SSQ", x = "Age Group") +
  ylim(-10, 60) + scale_fill_manual(values = c("#A8DADC", "#FFD6A5", "#FDFFB6", "#BDE0FE")) +
  theme(legend.position = "none")

ggplotly(plt2)

The medians are much lower for those with no experience for 16-37 year-olds, reflecting above observations. The median for 38-45 year-olds are equal.

However, the group with highest median SSQ was 22-29 year-olds, at 9.0 compared to the other groups’ medians which were either 1.0 or 2.0. Since the 22-29 year-olds made up 24.3% of the “with experience” group, compared to just 15.6% of the “no experience” group, this might explain the higher median and mean SSQs observed for those with VR experience.

Yet, these observations conflict with research suggesting older users experience more severe cyber-sickness (Oh & Son, 2022). This is possibly due to our data having unequal sample sizes across age groups. Namely, there are 29 participants in the 30-37 age group but only 7 in the 16-21 age group. This limits the reliability of comparisons.

Finally, we filtered by gender to investigate it as a potential confounder.

Code
library(tidyverse)
library(plotly)

plt = ggplot(filteredData, aes(x = as.factor(gender) , y = ssq_full, group = VRexperience, fill = VRexperience)) +
  geom_boxplot() +
  labs(x = "Gender",  y = "Full SSQ") + scale_fill_manual(values = c("#A8DADC", "#FFD6A5"))

ggplotly(plt) %>% layout(boxmode = "group")
Code
library(tidyverse)
library(plotly)

plt = ggplot(filteredData, aes(x = as.factor(gender) , y = ssq_full, fill = gender)) +
  geom_boxplot() +
  labs(x = "Gender",  y = "Full SSQ") + scale_fill_manual(values = c("#A8DADC", "#FFD6A5")) +
  theme(legend.position = "none")

ggplotly(plt)

Among those with VR experience, females report greater symptoms, with a median of 8.0 compared to males at 5.0, agreeing with research showing females experienced more severe motion sickness than males (Chattha et al., 2020), but without VR experience, this is contradicted with a median of -1.0 for females and 0.0 for males. The groups of both genders with experience still showed higher full SSQ scores than their counterparts with no experience.

Professional Standard of Report

We maintained a shared value of respect as we respected the privacy of others and the promises of confidentiality given to them as the data we used was anonymous. We obeyed ethical principles by pursuing objectivity as we presented all findings holistically, and in a transparent manner irrespective of outcomes.

Acknowledgements

Group Meetings

13 March, 13:00

In attendance: Viet, Borna, Gokul (Ryan), Mohamed

We confirmed our research question and the variables we will be using for analysis, discussed our ideas on what types of graphs we can use to present the data, and decided on roles for our group members to take up over the next few weeks.

We also had group discussions at every Tuesday workshop where we would discuss progress we have made over the past week, what has been completed, what still needs to be completed and our plan of action for the next week.

Resources Used

Our group collaborated on the project using Git and GitHub (https://github.com/uncoolbeans/DATA1901_project_1)

Additionally, for our IDA and to make charts using R, we accessed documentation for:

AI Usage

AI (ChatGPT) was used to clean up our code for one version pushed to GitHub. This was overwritten in subsequent versions. In our final submission, there is no use of any AI.

Drop-in Session(s)

Our group sent a representative (Ryan) for a drop-in session at Carslaw 535 at 3pm on April 10th to get some feedback regarding the depth of our graphs and if they were enough to back up our analysis points. We received feedback that we should include more titles and label our legend for the graphs, which we subsequently implemented.

References

Chattha, U. A., Janjua, U. I., Anwar, F., Madni, T. M., Cheema, M. F., & Janjua, S. I. (2020). Motion Sickness in Virtual Reality: An Empirical Evaluation. IEEE Access, 8, 130486–130499. https://doi.org/10.1109/access.2020.3007076

‌Oh, H., & Son, W. (2022). Cybersickness and Its Severity Arising from Virtual Reality Content: A Comprehensive Study. Sensors, 22(4), 1314. https://doi.org/10.3390/s22041314